#### Preprocess all of the data and run preText ####

# Set your working directory to Data subdirectory of the replication repo. For
# us, this looks like:
setwd("~/Desktop/Replication_Materials/Data")

# Install preText:
install.packages("preText")

# Load the package:
library(preText)


### Death Row Statements ###

# Load the data:
load("Death_Row_Statements.RData")

# Preprocess the data:
death_row_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
death_row_preText <- preText(
    death_row_fact_prep,
    dataset_name = "Death Row Statements",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(death_row_preText,
                            remove_intercept = TRUE)


### Congressional Bills ###

# Load the data:
load("House_Bills_113.RData")

# Preprocess the data:
cong_bills_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
cong_bills_preText <- preText(
    cong_bills_fact_prep,
    dataset_name = "Congressional Bills",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(cong_bills_preText,
                            remove_intercept = TRUE)


### Indian Treaties ###

# Load the data:
load("Indian_Treaties.RData")

# Preprocess the data:
indian_treaties_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
indian_treaties_preText <- preText(
    indian_treaties_fact_prep,
    dataset_name = "Indian Treaties",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(indian_treaties_preText,
                            remove_intercept = TRUE)

# generate preText score plot for each specification as in the paper:
preText_score_plot(indian_treaties_preText)


### NYT Articles ###

# Load the data:
load("NYT_Articles.RData")

# Preprocess the data:
nyt_articles_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
nyt_articles_preText <- preText(
    nyt_articles_fact_prep,
    dataset_name = "NYT Articles",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(nyt_articles_preText,
                            remove_intercept = TRUE)


### Press Releases ###

# Load the data:
load("Press_Releases.RData")

# Preprocess the data:
press_releases_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
press_releases_preText <- preText(
    press_releases_fact_prep,
    dataset_name = "Press Releases",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(press_releases_preText,
                            remove_intercept = TRUE)


### SOTU Speeches ###

# Load the data:
load("SOTU_Speeches.RData")

# Preprocess the data:
sotu_speeches_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
sotu_speeches_preText <- preText(
    sotu_speeches_fact_prep,
    dataset_name = "SOTU Speeches",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(sotu_speeches_preText,
                            remove_intercept = TRUE)


### Trump Tweets ###

# Load the data:
load("Trump_Campaign_Tweets.RData")

# Preprocess the data:
trump_tweets_fact_prep <- factorial_preprocessing(documents)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
trump_tweets_preText <- preText(
    trump_tweets_fact_prep,
    dataset_name = "Trump Tweets",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(trump_tweets_preText,
                            remove_intercept = TRUE)


### UK Manifestos ###

# Load the data:
load("UK_Manifestos.RData")

# Preprocess the data (note we use a higher threshold here since there are only
# 69 documents and we want to exclude terms that do not appear in at least two
# of them):
uk_manifestos_fact_prep <- factorial_preprocessing(documents,
                                infrequent_term_threshold = 0.02)

# Generate preText results (can change num_comparisons to replicate results for
# different numbers of comparisons in paper: 20, 50, 100):
uk_manifestos_preText <- preText(
    uk_manifestos_fact_prep,
    dataset_name = "UK Manifestos",
    num_comparisons = 100)

# Generate preText regression plot:
regression_coefficient_plot(uk_manifestos_preText,
                            remove_intercept = TRUE)



